#!/usr/bin/env bash
# deepdive-corenlp-start -- Starts CoreNLP HTTP server for use with DeepDive
# $ deepdive corenlp start PORT
#
# $ export CORENLP_PORT=PORT
# $ deepdive corenlp start
##
set -euo pipefail

: ${CORENLP_HOME:?} ${CORENLP_RUNDIR:?} ${CORENLP_PORT:?}
: ${CORENLP_TIMEOUT:=86400000} # wait for a day to parse a single document and give up
: ${CORENLP_JAVAOPTS:=}
: ${CORENLP_START_DELAY:=120} # wait for a minute on average

deepdive-corenlp-installed

[[ $# -le 1 ]] || usage "$0" "Too many arguments"
[[ $# -eq 0 ]] || CORENLP_PORT=$1  # command-line argument overrides CORENLP_PORT
export CORENLP_PORT

# default log locations for stdout and stderr
: ${CORENLP_LOG_OUT:=/dev/null}  # ignore stdout by default as all text parsed is duplicated
: ${CORENLP_LOG_ERR:="$CORENLP_RUNDIR"/server-port"$CORENLP_PORT".log} # stderr may be useful to keep

pid_file="$CORENLP_RUNDIR"/server-port"$CORENLP_PORT".pid
mkdir -p "$CORENLP_RUNDIR"

# emulate a lock to elect a single process to actually launch CoreNLP when there's contention
set -o noclobber
{ echo $$ >"$pid_file".launching; } 2>/dev/null || {
    if pid=$(cat "$pid_file".launching 2>/dev/null) && ! ps -p "$pid" &>/dev/null; then
        # but take over stale locks
        set +o noclobber
        echo $$ >"$pid_file".launching
        sleep 0.$RANDOM
    fi
}
set +o noclobber
is_elected=true; [[ $(cat "$pid_file".launching 2>/dev/null) = $$ ]] || is_elected=false

# don't start a new one when something's already running
if [[ -s "$pid_file" ]]; then
    if deepdive-corenlp-is-running &>/dev/null; then
        warning "CoreNLP server at CORENLP_PORT=$CORENLP_PORT already running (PID $(cat "$pid_file"))"
    elif $is_elected; then
        # pid file seems stale
        rm -f "$pid_file"
    fi
fi

# start CoreNLP server
if $is_elected; then
    if ! [[ -s "$pid_file" ]]; then
        echo >&2 "CoreNLP server at CORENLP_PORT=$CORENLP_PORT starting..."
        # ensure we can create log files and PID files
        mkdir -p "$(dirname "$CORENLP_LOG_OUT")" "$(dirname "$CORENLP_LOG_ERR")"
        touch "$pid_file" "$CORENLP_LOG_OUT" "$CORENLP_LOG_ERR"
        launch_with=
        # XXX it doesn't seem like a good idea to launch many CoreNLP servers,
        # but if that happens, e.g., by a UDF running in parallel, below will
        # cope with the thrashing issue in many CoreNLP servers loading at once
        # unless a particular CoreNLP server port is being explicitly set
        # or CoreNLP server is started outside of DeepDive's UDF
        if ! $CORENLP_PORT_SET && [[ -n ${DEEPDIVE_CURRENT_PROCESS_INDEX:-} ]]; then
            # XXX CoreNLP ObjectInputStream uses all cores while loading models no matter what -t flag we pass
            # which can be devastating when multiple UDFs try to launch many servers at once
            # so we must cope with the impact of simulateneous loading somehow
            if type taskset &>/dev/null; then
                # On Linux, limit number of cores JVM can see with taskset(1)
                limit1core() { taskset -c $(( $DEEPDIVE_CURRENT_PROCESS_INDEX - 1 )) "$@"; }
                launch_with=limit1core
            fi
        fi
        # now, actually launch the JVM running CoreNLP server
        $launch_with nohup java ${CORENLP_JAVAOPTS} -cp "$CORENLP_HOME/*" \
            edu.stanford.nlp.pipeline.StanfordCoreNLPServer \
            --port $CORENLP_PORT --timeout $CORENLP_TIMEOUT \
            </dev/null >"$CORENLP_LOG_OUT" 2>"$CORENLP_LOG_ERR" &
        echo $! >"$pid_file"
    fi
    rm -f "$pid_file".launching
fi

# wait for CoreNLP server to boot up
export CORENLP_START_DELAY
CORENLP_SERVER_ENDPOINT=$(deepdive-corenlp-server-url)
export CORENLP_SERVER_ENDPOINT
while ! deepdive-corenlp-is-running &>/dev/null
do sleep 0.$((1 + $RANDOM % 9))
    let --CORENLP_START_DELAY ||
        error "CoreNLP server at CORENLP_PORT=$CORENLP_PORT still not ready"
done
echo >&2 "CoreNLP server at CORENLP_PORT=$CORENLP_PORT ready."
echo >&2 "To stop it after final use, run: deepdive corenlp stop"
echo >&2 "To watch its log, run: deepdive corenlp watch-log"
